#project_dir <- snakemake@config$project_dir
project_dir <- '/project2/mstephens/ktayeb/logistic-susie-snakemake'
knitr::opts_knit$set(root.dir = project_dir)
library(ggcorrplot)
## Loading required package: ggplot2
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(reticulate)
library(tidyr)
library(tictoc)
library(ggplot2)
source('workflow/scripts/plotting/plot_functions.R')
# httpgd::hgd()
load_tbl <- function(path){
print(path)
dplyr::as_tibble(readRDS(path))
}
pip_summary_files <- Sys.glob('results/gsea/*/*_pip_summary.rds')
res_pip <- purrr::map_dfr(pip_summary_files, ~load_tbl(.x))
## [1] "results/gsea/msigdb_c1_5k/gibss_abf_default_pip_summary.rds"
## [1] "results/gsea/msigdb_c1_5k/gibss_abf_L1_pip_summary.rds"
## [1] "results/gsea/msigdb_c1_5k/gibss_labf_default_pip_summary.rds"
## [1] "results/gsea/msigdb_c1_5k/gibss_labf_L1_pip_summary.rds"
## [1] "results/gsea/msigdb_c1_5k/linear_susie_default_pip_summary.rds"
## [1] "results/gsea/msigdb_c1_5k/linear_susie_L1_pip_summary.rds"
## [1] "results/gsea/msigdb_h/gibss_abf_default_pip_summary.rds"
## [1] "results/gsea/msigdb_h/gibss_abf_L1_pip_summary.rds"
## [1] "results/gsea/msigdb_h/gibss_labf_default_pip_summary.rds"
## [1] "results/gsea/msigdb_h/gibss_labf_L1_pip_summary.rds"
## [1] "results/gsea/msigdb_h/linear_susie_default_pip_summary.rds"
## [1] "results/gsea/msigdb_h/linear_susie_L1_pip_summary.rds"
## [1] "results/gsea/X_bin_med/gibss_abf_default_pip_summary.rds"
## [1] "results/gsea/X_bin_med/gibss_abf_L1_pip_summary.rds"
## [1] "results/gsea/X_bin_med/gibss_labf_default_pip_summary.rds"
## [1] "results/gsea/X_bin_med/gibss_labf_L1_pip_summary.rds"
## [1] "results/gsea/X_bin_med/linear_susie_default_pip_summary.rds"
## [1] "results/gsea/X_bin_med/linear_susie_L1_pip_summary.rds"
## [1] "results/gsea/X_bin_strong/gibss_abf_default_pip_summary.rds"
## [1] "results/gsea/X_bin_strong/gibss_abf_L1_pip_summary.rds"
## [1] "results/gsea/X_bin_strong/gibss_labf_default_pip_summary.rds"
## [1] "results/gsea/X_bin_strong/gibss_labf_L1_pip_summary.rds"
## [1] "results/gsea/X_bin_strong/linear_susie_default_pip_summary.rds"
## [1] "results/gsea/X_bin_strong/linear_susie_L1_pip_summary.rds"
## [1] "results/gsea/X_bin_weak/gibss_abf_default_pip_summary.rds"
## [1] "results/gsea/X_bin_weak/gibss_abf_L1_pip_summary.rds"
## [1] "results/gsea/X_bin_weak/gibss_labf_default_pip_summary.rds"
## [1] "results/gsea/X_bin_weak/gibss_labf_L1_pip_summary.rds"
## [1] "results/gsea/X_bin_weak/linear_susie_default_pip_summary.rds"
## [1] "results/gsea/X_bin_weak/linear_susie_L1_pip_summary.rds"
cs_summary_files <- Sys.glob('results/gsea/*/*_cs_summary.rds')
res_cs <- purrr::map_dfr(cs_summary_files, ~dplyr::as_tibble(readRDS(.x)))
manifest <- readr::read_delim('config/gsea_sim/gsea_sim_manifest.tsv', delim='\t')
## Rows: 2880 Columns: 9
## ── Column specification ───────────────────────────────────────────────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (4): name, X, sim_hash, sim_id
## dbl (5): rep, beta0, beta, q, seed
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
threshold_var <- function(df, var, val, comp){
# var <- as_string(ensym(var))
filter(df, comp(.data[[var]], !!val))
}
cs_filtered_coverage <- function(res_cs, var, val, comp){
# counts number of causal variables
Q_tbl <- res_cs %>%
select(c(method, sim_id, q)) %>%
unique() %>%
group_by(method) %>%
summarize(Q = sum(q))
# coverage, and frequency of number of causal variants per CS
res_cs %>%
left_join(Q_tbl) %>%
threshold_var(var, val, comp) %>%
mutate(val = val) %>%
group_by(method) %>%
summarize(coverage = mean(covered), n_found2 = list(table(n_found)), mean_size = mean(size), val = val[1], Q=Q[1], n_found = sum(n_found)) %>%
unnest_wider(n_found2)
}
make_plot <- function(res_cs, var, vals, op){
# filter CSs based on `val` and `op`
f <- function(val){
res_cs %>%
cs_filtered_coverage(var, val, op)
}
# compute coverage at each `val` in `vals`
filtered_coverage <- purrr::map_dfr(vals, f)
# plot coverage as a function of val
a <- filtered_coverage %>%
ggplot(aes(x = val, y = coverage, color = method)) +
geom_path() +
geom_hline(yintercept = 0.95) +
xlab(var)
# plot power as a function of val
b <- filtered_coverage %>%
mutate(power = n_found/Q) %>%
mutate(power = `1`/Q) %>%
ggplot(aes(x = val, y = power, color = method)) +
geom_path() +
xlab(var)
cowplot::plot_grid(a, b)
}
R_weak <- readRDS('results/gsea/X_bin_weak/ld.rds')
ggcorrplot(R_weak[1:20, 1:20])
### Simulation settings
# show unique simulation settings for SER simulations
manifest %>%
dplyr::filter(X == 'X_bin_weak') %>%
select(c(X, beta0, beta)) %>%
unique() %>%
rmarkdown::paged_table()
# histogram of CSs, and coverage over all simulations
res_cs %>%
dplyr::filter(X == 'X_bin_weak') %>%
ggplot(aes(x = size)) +
geom_histogram() +
facet_wrap(vars(method))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
res_cs %>%
dplyr::filter(X == 'X_bin_weak') %>%
group_by(method) %>%
summarize(coverage = mean(covered), n_found = list(table(n_found)), mean_size = mean(size)) %>%
unnest_wider(n_found) %>%
knitr::kable()
| method | coverage | 0 | 1 | 2 | 3 | 4 | 5 | mean_size |
|---|---|---|---|---|---|---|---|---|
| gibss_abf_L1 | 0.9916753 | 8 | 561 | 128 | 142 | 34 | 88 | 313.9199 |
| gibss_abf_default | 0.8185224 | 872 | 2569 | 164 | 620 | 132 | 448 | 594.2724 |
| gibss_labf_L1 | 0.9771072 | 22 | 577 | 120 | 132 | 12 | 98 | 358.5515 |
| gibss_labf_default | 0.8747138 | 602 | 2771 | 236 | 632 | 90 | 474 | 608.1682 |
| linear_susie_L1 | 0.9542144 | 44 | 615 | 114 | 106 | 18 | 64 | 266.3809 |
| linear_susie_default | 0.9125607 | 630 | 4295 | 180 | 1185 | 189 | 726 | 581.0319 |
The Laplace ABF gives lower coverage marginally over all CSs and all simulation scenarios. This is resolved by filtering out large CSs, which are largely uninformative.
res_cs %>%
dplyr::filter(X == 'X_bin_weak') %>%
filter(size < 200) %>%
group_by(method) %>%
summarize(coverage = mean(covered), n_found = list(table(n_found)), mean_size = mean(size)) %>%
unnest_wider(n_found) %>%
knitr::kable()
| method | coverage | 0 | 1 | 2 | 3 | 4 | 5 | mean_size |
|---|---|---|---|---|---|---|---|---|
| gibss_abf_L1 | 0.9933665 | 4 | 413 | 114 | 42 | 18 | 12 | 4.820895 |
| gibss_abf_default | 0.9953134 | 8 | 1661 | 26 | 6 | 4 | 2 | 3.011716 |
| gibss_labf_L1 | 1.0000000 | NA | 429 | 104 | 32 | NA | NA | 2.647788 |
| gibss_labf_default | 0.9987738 | 2 | 1599 | 24 | 4 | 2 | NA | 3.136726 |
| linear_susie_L1 | 0.9541985 | 30 | 499 | 98 | 24 | 4 | NA | 3.923664 |
| linear_susie_default | 0.9741766 | 69 | 2540 | 48 | 9 | 6 | NA | 2.500374 |
# filter on purity
vals <- c(seq(0.1, 0.9, by = 0.1), seq(0.91, .99, by = 0.01))
res_cs %>%
dplyr::filter(X == 'X_bin_weak') %>%
make_plot('purity', vals, `>`)
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Don't know how to automatically pick scale for object of type <table>. Defaulting to continuous.
# filter on cs size
vals <- seq(5, 100, by=5)
res_cs %>%
dplyr::filter(X == 'X_bin_weak') %>%
make_plot('size', vals, `<`)
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Don't know how to automatically pick scale for object of type <table>. Defaulting to continuous.
# filter on the SER-level BFs
# note multiple CSs may capture the same effect causing "power" > 1
vals <- seq(0, 100, by=5)
res_cs %>%
dplyr::filter(X == 'X_bin_weak') %>%
make_plot('lbf_ser', vals, `>`)
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Don't know how to automatically pick scale for object of type <table>. Defaulting to continuous.
# fdp vs power
fdp_plot <- res_pip %>%
dplyr::filter(X == 'X_bin_weak') %>%
unnest_longer(c(pip, causal)) %>%
ggplot(aes(pip, causal, color=method)) +
stat_fdp_power(geom='path', max_fdp=1.0) +
labs(x = 'FDP', y = 'Power')
fdp_plot
PIP calibration looks a little messy– may need more simulations to get a clear picture. Note that for the logistic SERs the only source of error here is the approximation error of using the Laplace ABF vs Wakefield ABF. TODO: add comparison to exact computation of the BF via quadrature.
pip_calibration_plot <- res_pip %>%
dplyr::filter(X == 'X_bin_weak') %>%
unnest_longer(c(pip, causal)) %>%
ggplot(aes(pip, causal)) +
stat_pip_calibration() +
geom_abline(intercept = 0, slope = 1, color='red') +
theme_bw() +
labs(x='PIP', y='Frequency') +
facet_wrap(vars(method))
pip_calibration_plot
## Warning: Removed 6 rows containing missing values (`geom_pointrange()`).
R_med <- readRDS('results/gsea/X_bin_med/ld.rds')
ggcorrplot(R_med[1:20, 1:20])
### Simulation settings
# show unique simulation settings for SER simulations
manifest %>%
dplyr::filter(X == 'X_bin_med') %>%
select(c(X, beta0, beta)) %>%
unique() %>%
rmarkdown::paged_table()
# histogram of CSs, and coverage over all simulations
res_cs %>%
dplyr::filter(X == 'X_bin_med') %>%
ggplot(aes(x = size)) +
geom_histogram() +
facet_wrap(vars(method))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
res_cs %>%
dplyr::filter(X == 'X_bin_med') %>%
group_by(method) %>%
summarize(coverage = mean(covered), n_found = list(table(n_found)), mean_size = mean(size)) %>%
unnest_wider(n_found) %>%
knitr::kable()
| method | coverage | 0 | 1 | 2 | 3 | 4 | 5 | mean_size |
|---|---|---|---|---|---|---|---|---|
| gibss_abf_L1 | 0.9667014 | 32 | 582 | 150 | 108 | 33 | 56 | 238.0094 |
| gibss_abf_default | 0.7993757 | 964 | 2582 | 160 | 566 | 117 | 416 | 567.3617 |
| gibss_labf_L1 | 0.9667014 | 32 | 602 | 132 | 110 | 18 | 67 | 273.2466 |
| gibss_labf_default | 0.8863684 | 546 | 2798 | 212 | 726 | 118 | 405 | 581.8418 |
| linear_susie_L1 | 0.9542144 | 44 | 670 | 114 | 75 | 20 | 38 | 211.9542 |
| linear_susie_default | 0.9029840 | 699 | 4299 | 198 | 1195 | 129 | 685 | 559.6916 |
The Laplace ABF gives lower coverage marginally over all CSs and all simulation scenarios. This is resolved by filtering out large CSs, which are largely uninformative.
res_cs %>%
dplyr::filter(X == 'X_bin_med') %>%
filter(size < 200) %>%
group_by(method) %>%
summarize(coverage = mean(covered), n_found = list(table(n_found)), mean_size = mean(size)) %>%
unnest_wider(n_found) %>%
knitr::kable()
| method | coverage | 0 | 1 | 2 | 3 | 4 | 5 | mean_size |
|---|---|---|---|---|---|---|---|---|
| gibss_abf_L1 | 0.9593614 | 28 | 466 | 138 | 34 | 21 | 2 | 5.586357 |
| gibss_abf_default | 0.9535386 | 86 | 1726 | 26 | 8 | 1 | 4 | 2.776337 |
| gibss_labf_L1 | 0.9785605 | 14 | 494 | 122 | 14 | 4 | 5 | 5.797856 |
| gibss_labf_default | 0.9886942 | 20 | 1704 | 32 | 4 | 2 | 7 | 2.704353 |
| linear_susie_L1 | 0.9445215 | 40 | 562 | 98 | 19 | 2 | NA | 5.775312 |
| linear_susie_default | 0.9672535 | 93 | 2667 | 45 | 19 | 12 | 4 | 4.014789 |
# filter on purity
vals <- c(seq(0.1, 0.9, by = 0.1), seq(0.91, .99, by = 0.01))
res_cs %>%
dplyr::filter(X == 'X_bin_med') %>%
make_plot('purity', vals, `>`)
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Don't know how to automatically pick scale for object of type <table>. Defaulting to continuous.
# filter on cs size
vals <- seq(5, 100, by=5)
res_cs %>%
dplyr::filter(X == 'X_bin_med') %>%
make_plot('size', vals, `<`)
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Don't know how to automatically pick scale for object of type <table>. Defaulting to continuous.
# filter on the SER-level BFs
# note multiple CSs may capture the same effect causing "power" > 1
vals <- seq(0, 100, by=5)
res_cs %>%
dplyr::filter(X == 'X_bin_med') %>%
make_plot('lbf_ser', vals, `>`)
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Don't know how to automatically pick scale for object of type <table>. Defaulting to continuous.
# fdp vs power
fdp_plot <- res_pip %>%
dplyr::filter(X == 'X_bin_med') %>%
unnest_longer(c(pip, causal)) %>%
ggplot(aes(pip, causal, color=method)) +
stat_fdp_power(geom='path', max_fdp=1.0) +
labs(x = 'FDP', y = 'Power')
fdp_plot
PIP calibration looks a little messy– may need more simulations to get a clear picture. Note that for the logistic SERs the only source of error here is the approximation error of using the Laplace ABF vs Wakefield ABF. TODO: add comparison to exact computation of the BF via quadrature.
pip_calibration_plot <- res_pip %>%
dplyr::filter(X == 'X_bin_med') %>%
unnest_longer(c(pip, causal)) %>%
ggplot(aes(pip, causal)) +
stat_pip_calibration() +
geom_abline(intercept = 0, slope = 1, color='red') +
theme_bw() +
labs(x='PIP', y='Frequency') +
facet_wrap(vars(method))
pip_calibration_plot
## Warning: Removed 6 rows containing missing values (`geom_pointrange()`).
R_strong <- readRDS('results/gsea/X_bin_strong/ld.rds')
ggcorrplot(R_strong[1:100, 1:100])
# show unique simulation settings for SER simulations
manifest %>%
dplyr::filter(X == 'X_bin_strong') %>%
select(c(X, beta0, beta)) %>%
unique() %>%
rmarkdown::paged_table()
# histogram of CSs, and coverage over all simulations
res_cs %>%
filter(X == 'X_bin_strong') %>%
ggplot(aes(x = size)) +
geom_histogram() +
facet_wrap(vars(method))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
res_cs %>%
dplyr::filter(X == 'X_bin_strong') %>%
group_by(method) %>%
summarize(coverage = mean(covered), n_found = list(table(n_found)), mean_size = mean(size)) %>%
unnest_wider(n_found) %>%
knitr::kable()
| method | coverage | 0 | 1 | 2 | 3 | 4 | 5 | mean_size |
|---|---|---|---|---|---|---|---|---|
| gibss_abf_L1 | 0.9041667 | 138 | 879 | 210 | 144 | 24 | 45 | 202.3250 |
| gibss_abf_default | 0.8295833 | 1227 | 4053 | 372 | 837 | 162 | 549 | 554.0187 |
| gibss_labf_L1 | 0.9312500 | 99 | 969 | 156 | 135 | 24 | 57 | 232.3063 |
| gibss_labf_default | 0.9362500 | 459 | 4443 | 402 | 1137 | 213 | 546 | 575.1846 |
| linear_susie_L1 | 0.8750000 | 180 | 981 | 117 | 96 | 30 | 36 | 188.8500 |
| linear_susie_default | 0.9191667 | 582 | 4464 | 249 | 1167 | 162 | 576 | 550.1612 |
The Laplace ABF gives lower coverage marginally over all CSs and all simulation scenarios. This is resolved by filtering out large CSs, which are largely uninformative.
res_cs %>%
dplyr::filter(X == 'X_bin_strong') %>%
filter(size < 200) %>%
group_by(method) %>%
summarize(coverage = mean(covered), n_found = list(table(n_found)), mean_size = mean(size)) %>%
unnest_wider(n_found) %>%
knitr::kable()
| method | coverage | 0 | 1 | 2 | 3 | 4 | 5 | mean_size |
|---|---|---|---|---|---|---|---|---|
| gibss_abf_L1 | 0.8805556 | 129 | 708 | 189 | 48 | 3 | 3 | 18.255556 |
| gibss_abf_default | 0.8970134 | 300 | 2505 | 81 | 24 | 3 | NA | 10.173018 |
| gibss_labf_L1 | 0.9190751 | 84 | 783 | 138 | 30 | 3 | NA | 14.283237 |
| gibss_labf_default | 0.9646409 | 96 | 2535 | 78 | 6 | NA | NA | 8.215470 |
| linear_susie_L1 | 0.8463612 | 171 | 816 | 102 | 15 | 9 | NA | 12.765499 |
| linear_susie_default | 0.8970438 | 303 | 2571 | 57 | 12 | NA | NA | 8.835882 |
# filter on purity
vals <- c(seq(0.1, 0.9, by = 0.1), seq(0.91, .99, by = 0.01))
res_cs %>%
dplyr::filter(X == 'X_bin_strong') %>%
make_plot('purity', vals, `>`)
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Don't know how to automatically pick scale for object of type <table>. Defaulting to continuous.
# filter on cs size
vals <- seq(5, 100, by=5)
res_cs %>%
dplyr::filter(X == 'X_bin_strong') %>%
make_plot('size', vals, `<`)
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Don't know how to automatically pick scale for object of type <table>. Defaulting to continuous.
# filter on the SER-level BFs
# note multiple CSs may capture the same effect causing "power" > 1
vals <- seq(0, 100, by=5)
res_cs %>%
dplyr::filter(X == 'X_bin_strong') %>%
make_plot('lbf_ser', vals, `>`)
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Don't know how to automatically pick scale for object of type <table>. Defaulting to continuous.
# fdp vs power
fdp_plot <- res_pip %>%
dplyr::filter(X == 'X_bin_strong') %>%
unnest_longer(c(pip, causal)) %>%
ggplot(aes(pip, causal, color=method)) +
stat_fdp_power(geom='path', max_fdp=1.0) +
labs(x = 'FDP', y = 'Power')
fdp_plot
PIP calibration looks a little messy– may need more simulations to get a clear picture. Note that for the logistic SERs the only source of error here is the approximation error of using the Laplace ABF vs Wakefield ABF. TODO: add comparison to exact computation of the BF via quadrature.
pip_calibration_plot <- res_pip %>%
dplyr::filter(X == 'X_bin_strong') %>%
unnest_longer(c(pip, causal)) %>%
ggplot(aes(pip, causal)) +
stat_pip_calibration() +
geom_abline(intercept = 0, slope = 1, color='red') +
theme_bw() +
labs(x='PIP', y='Frequency') +
facet_wrap(vars(method))
pip_calibration_plot
## Warning: Removed 6 rows containing missing values (`geom_pointrange()`).
R_strong <- readRDS('results/gsea/msigdb_c2_5k/ld.rds')
ggcorrplot(R_strong[1:100, 1:100])
# show unique simulation settings for SER simulations
manifest %>%
dplyr::filter(X == 'msigdb_c2_5k') %>%
select(c(X, beta0, beta)) %>%
unique() %>%
rmarkdown::paged_table()
# histogram of CSs, and coverage over all simulations
res_cs %>%
filter(X == 'msigdb_c2_5k') %>%
ggplot(aes(x = size)) +
geom_histogram() +
facet_wrap(vars(method))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
res_cs %>%
dplyr::filter(X == 'msigdb_c2_5k') %>%
group_by(method) %>%
summarize(coverage = mean(covered), n_found = list(table(n_found)), mean_size = mean(size)) %>%
unnest_wider(n_found) %>%
knitr::kable()
| method | coverage | 1 | 3 | 2 | mean_size |
|---|---|---|---|---|---|
| gibss_abf_L1 | 1 | 1 | NA | NA | 1.0 |
| gibss_abf_default | 1 | 5 | NA | NA | 19.6 |
| gibss_labf_L1 | 1 | 1 | NA | NA | 1.0 |
| gibss_labf_default | 1 | 4 | 1 | NA | 19.2 |
| linear_susie_L1 | 1 | 1 | NA | NA | 1.0 |
| linear_susie_default | 1 | 3 | NA | 2 | 19.8 |
The Laplace ABF gives lower coverage marginally over all CSs and all simulation scenarios. This is resolved by filtering out large CSs, which are largely uninformative.
res_cs %>%
dplyr::filter(X == 'msigdb_c2_5k') %>%
filter(size < 200) %>%
group_by(method) %>%
summarize(coverage = mean(covered), n_found = list(table(n_found)), mean_size = mean(size)) %>%
unnest_wider(n_found) %>%
knitr::kable()
| method | coverage | 1 | 3 | 2 | mean_size |
|---|---|---|---|---|---|
| gibss_abf_L1 | 1 | 1 | NA | NA | 1.0 |
| gibss_abf_default | 1 | 5 | NA | NA | 19.6 |
| gibss_labf_L1 | 1 | 1 | NA | NA | 1.0 |
| gibss_labf_default | 1 | 4 | 1 | NA | 19.2 |
| linear_susie_L1 | 1 | 1 | NA | NA | 1.0 |
| linear_susie_default | 1 | 3 | NA | 2 | 19.8 |
# filter on purity
vals <- c(seq(0.1, 0.9, by = 0.1), seq(0.91, .99, by = 0.01))
res_cs %>%
dplyr::filter(X == 'msigdb_c2_5k') %>%
make_plot('purity', vals, `>`)
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Don't know how to automatically pick scale for object of type <table>. Defaulting to continuous.
# filter on cs size
vals <- seq(5, 100, by=5)
res_cs %>%
dplyr::filter(X == 'msigdb_c2_5k') %>%
make_plot('size', vals, `<`)
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Don't know how to automatically pick scale for object of type <table>. Defaulting to continuous.
# filter on the SER-level BFs
# note multiple CSs may capture the same effect causing "power" > 1
vals <- seq(0, 100, by=5)
res_cs %>%
dplyr::filter(X == 'msigdb_c2_5k') %>%
make_plot('lbf_ser', vals, `>`)
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Don't know how to automatically pick scale for object of type <table>. Defaulting to continuous.
# fdp vs power
fdp_plot <- res_pip %>%
dplyr::filter(X == 'msigdb_c2_5k') %>%
unnest_longer(c(pip, causal)) %>%
ggplot(aes(pip, causal, color=method)) +
stat_fdp_power(geom='path', max_fdp=1.0) +
labs(x = 'FDP', y = 'Power')
fdp_plot
PIP calibration looks a little messy– may need more simulations to get a clear picture. Note that for the logistic SERs the only source of error here is the approximation error of using the Laplace ABF vs Wakefield ABF. TODO: add comparison to exact computation of the BF via quadrature.
pip_calibration_plot <- res_pip %>%
dplyr::filter(X == 'msigdb_c2_5k') %>%
unnest_longer(c(pip, causal)) %>%
ggplot(aes(pip, causal)) +
stat_pip_calibration() +
geom_abline(intercept = 0, slope = 1, color='red') +
theme_bw() +
labs(x='PIP', y='Frequency') +
facet_wrap(vars(method))
pip_calibration_plot
## Warning: Removed 1 rows containing missing values (`geom_pointrange()`).
R_strong <- readRDS('results/gsea/msigdb_h/ld.rds')
ggcorrplot(R_strong[1:20, 1:20])
# show unique simulation settings for SER simulations
manifest %>%
dplyr::filter(X == 'msigdb_h') %>%
select(c(X, beta0, beta)) %>%
unique() %>%
rmarkdown::paged_table()
# histogram of CSs, and coverage over all simulations
res_cs %>%
filter(X == 'msigdb_h') %>%
ggplot(aes(x = size)) +
geom_histogram() +
facet_wrap(vars(method))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
res_cs %>%
dplyr::filter(X == 'msigdb_h') %>%
group_by(method) %>%
summarize(coverage = mean(covered), n_found = list(table(n_found)), mean_size = mean(size)) %>%
unnest_wider(n_found) %>%
knitr::kable()
| method | coverage | 0 | 1 | 2 | 3 | 4 | 5 | mean_size |
|---|---|---|---|---|---|---|---|---|
| gibss_abf_L1 | 0.9750000 | 12 | 331 | 60 | 45 | 9 | 23 | 13.21458 |
| gibss_abf_default | 0.8513570 | 356 | 1334 | 139 | 289 | 105 | 172 | 30.58246 |
| gibss_labf_L1 | 0.9875000 | 6 | 334 | 43 | 49 | 12 | 36 | 16.64792 |
| gibss_labf_default | 0.9219207 | 187 | 1373 | 163 | 359 | 115 | 198 | 31.56451 |
| linear_susie_L1 | 0.9687500 | 15 | 352 | 33 | 48 | 5 | 27 | 13.93542 |
| linear_susie_default | 0.9637500 | 87 | 1476 | 56 | 476 | 51 | 254 | 30.53875 |
The Laplace ABF gives lower coverage marginally over all CSs and all simulation scenarios. This is resolved by filtering out large CSs, which are largely uninformative.
res_cs %>%
dplyr::filter(X == 'msigdb_h') %>%
filter(size < 200) %>%
group_by(method) %>%
summarize(coverage = mean(covered), n_found = list(table(n_found)), mean_size = mean(size)) %>%
unnest_wider(n_found) %>%
knitr::kable()
| method | coverage | 0 | 1 | 2 | 3 | 4 | 5 | mean_size |
|---|---|---|---|---|---|---|---|---|
| gibss_abf_L1 | 0.9750000 | 12 | 331 | 60 | 45 | 9 | 23 | 13.21458 |
| gibss_abf_default | 0.8513570 | 356 | 1334 | 139 | 289 | 105 | 172 | 30.58246 |
| gibss_labf_L1 | 0.9875000 | 6 | 334 | 43 | 49 | 12 | 36 | 16.64792 |
| gibss_labf_default | 0.9219207 | 187 | 1373 | 163 | 359 | 115 | 198 | 31.56451 |
| linear_susie_L1 | 0.9687500 | 15 | 352 | 33 | 48 | 5 | 27 | 13.93542 |
| linear_susie_default | 0.9637500 | 87 | 1476 | 56 | 476 | 51 | 254 | 30.53875 |
# filter on purity
vals <- c(seq(0.1, 0.9, by = 0.1), seq(0.91, .99, by = 0.01))
res_cs %>%
dplyr::filter(X == 'msigdb_h') %>%
make_plot('purity', vals, `>`)
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Don't know how to automatically pick scale for object of type <table>. Defaulting to continuous.
# filter on cs size
vals <- seq(5, 100, by=5)
res_cs %>%
dplyr::filter(X == 'msigdb_h') %>%
make_plot('size', vals, `<`)
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Don't know how to automatically pick scale for object of type <table>. Defaulting to continuous.
# filter on the SER-level BFs
# note multiple CSs may capture the same effect causing "power" > 1
vals <- seq(0, 100, by=5)
res_cs %>%
dplyr::filter(X == 'msigdb_h') %>%
make_plot('lbf_ser', vals, `>`)
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Don't know how to automatically pick scale for object of type <table>. Defaulting to continuous.
# fdp vs power
fdp_plot <- res_pip %>%
dplyr::filter(X == 'msigdb_h') %>%
unnest_longer(c(pip, causal)) %>%
ggplot(aes(pip, causal, color=method)) +
stat_fdp_power(geom='path', max_fdp=1.0) +
labs(x = 'FDP', y = 'Power')
fdp_plot
PIP calibration looks a little messy– may need more simulations to get a clear picture. Note that for the logistic SERs the only source of error here is the approximation error of using the Laplace ABF vs Wakefield ABF. TODO: add comparison to exact computation of the BF via quadrature.
pip_calibration_plot <- res_pip %>%
dplyr::filter(X == 'msigdb_h') %>%
unnest_longer(c(pip, causal)) %>%
ggplot(aes(pip, causal)) +
stat_pip_calibration() +
geom_abline(intercept = 0, slope = 1, color='red') +
theme_bw() +
labs(x='PIP', y='Frequency') +
facet_wrap(vars(method))
pip_calibration_plot
## Warning: Removed 5 rows containing missing values (`geom_pointrange()`).
R_strong <- readRDS('results/gsea/msigdb_h/ld.rds')
ggcorrplot(R_strong[1:20, 1:20])
# show unique simulation settings for SER simulations
manifest %>%
dplyr::filter(X == 'msigdb_h') %>%
select(c(X, beta0, beta)) %>%
unique() %>%
rmarkdown::paged_table()
# histogram of CSs, and coverage over all simulations
res_cs %>%
filter(X == 'msigdb_h') %>%
ggplot(aes(x = size)) +
geom_histogram() +
facet_wrap(vars(method))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
res_cs %>%
dplyr::filter(X == 'msigdb_h') %>%
group_by(method) %>%
summarize(coverage = mean(covered), n_found = list(table(n_found)), mean_size = mean(size)) %>%
unnest_wider(n_found) %>%
knitr::kable()
| method | coverage | 0 | 1 | 2 | 3 | 4 | 5 | mean_size |
|---|---|---|---|---|---|---|---|---|
| gibss_abf_L1 | 0.9750000 | 12 | 331 | 60 | 45 | 9 | 23 | 13.21458 |
| gibss_abf_default | 0.8513570 | 356 | 1334 | 139 | 289 | 105 | 172 | 30.58246 |
| gibss_labf_L1 | 0.9875000 | 6 | 334 | 43 | 49 | 12 | 36 | 16.64792 |
| gibss_labf_default | 0.9219207 | 187 | 1373 | 163 | 359 | 115 | 198 | 31.56451 |
| linear_susie_L1 | 0.9687500 | 15 | 352 | 33 | 48 | 5 | 27 | 13.93542 |
| linear_susie_default | 0.9637500 | 87 | 1476 | 56 | 476 | 51 | 254 | 30.53875 |
The Laplace ABF gives lower coverage marginally over all CSs and all simulation scenarios. This is resolved by filtering out large CSs, which are largely uninformative.
res_cs %>%
dplyr::filter(X == 'msigdb_h') %>%
filter(size < 200) %>%
group_by(method) %>%
summarize(coverage = mean(covered), n_found = list(table(n_found)), mean_size = mean(size)) %>%
unnest_wider(n_found) %>%
knitr::kable()
| method | coverage | 0 | 1 | 2 | 3 | 4 | 5 | mean_size |
|---|---|---|---|---|---|---|---|---|
| gibss_abf_L1 | 0.9750000 | 12 | 331 | 60 | 45 | 9 | 23 | 13.21458 |
| gibss_abf_default | 0.8513570 | 356 | 1334 | 139 | 289 | 105 | 172 | 30.58246 |
| gibss_labf_L1 | 0.9875000 | 6 | 334 | 43 | 49 | 12 | 36 | 16.64792 |
| gibss_labf_default | 0.9219207 | 187 | 1373 | 163 | 359 | 115 | 198 | 31.56451 |
| linear_susie_L1 | 0.9687500 | 15 | 352 | 33 | 48 | 5 | 27 | 13.93542 |
| linear_susie_default | 0.9637500 | 87 | 1476 | 56 | 476 | 51 | 254 | 30.53875 |
# filter on purity
vals <- c(seq(0.1, 0.9, by = 0.1), seq(0.91, .99, by = 0.01))
res_cs %>%
dplyr::filter(X == 'msigdb_h') %>%
make_plot('purity', vals, `>`)
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Don't know how to automatically pick scale for object of type <table>. Defaulting to continuous.
# filter on cs size
vals <- seq(5, 100, by=5)
res_cs %>%
dplyr::filter(X == 'msigdb_h') %>%
make_plot('size', vals, `<`)
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Don't know how to automatically pick scale for object of type <table>. Defaulting to continuous.
# filter on the SER-level BFs
# note multiple CSs may capture the same effect causing "power" > 1
vals <- seq(0, 100, by=5)
res_cs %>%
dplyr::filter(X == 'msigdb_h') %>%
make_plot('lbf_ser', vals, `>`)
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Joining with `by = join_by(method)`
## Don't know how to automatically pick scale for object of type <table>. Defaulting to continuous.
# fdp vs power
fdp_plot <- res_pip %>%
dplyr::filter(X == 'msigdb_h') %>%
unnest_longer(c(pip, causal)) %>%
ggplot(aes(pip, causal, color=method)) +
stat_fdp_power(geom='path', max_fdp=1.0) +
labs(x = 'FDP', y = 'Power')
fdp_plot
PIP calibration looks a little messy– may need more simulations to get a clear picture. Note that for the logistic SERs the only source of error here is the approximation error of using the Laplace ABF vs Wakefield ABF. TODO: add comparison to exact computation of the BF via quadrature.
pip_calibration_plot <- res_pip %>%
dplyr::filter(X == 'msigdb_h') %>%
unnest_longer(c(pip, causal)) %>%
ggplot(aes(pip, causal)) +
stat_pip_calibration() +
geom_abline(intercept = 0, slope = 1, color='red') +
theme_bw() +
labs(x='PIP', y='Frequency') +
facet_wrap(vars(method))
pip_calibration_plot
## Warning: Removed 5 rows containing missing values (`geom_pointrange()`).